# -*- coding: utf-8 -*-
"""
Created on Sat Oct  8 09:31:16 2016
承接 1688_crawler_from_product.py 出来的数据，是
【1688_crawler_from_product】数据的补充

根据小凡提供的那份店名即人名的数据，爬取店名，将人名替换掉

数据地址：/mnt/hgfs/VMWare/crawler_outcome/阿里巴巴-企业名称为人名.xlsx
@author: stevenkwong
"""
from selenium import webdriver
import time
import pandas as pd

def whatstime():
    import time
    #  时间格式
    fmt = "[%Y-%b-%d  %H:%M:%S]: "
    timestr = time.strftime(fmt,time.localtime())
    return timestr

def get_cat_link(url):
    # 获取各个类别和它对应的连接
    browser = webdriver.Firefox()
#    browser = webdriver.PhantomJS(executable_path=\
#    '/home/stevenkwong/文档/web_app/phantomjs-2.1.1-linux-x86_64/bin/phantomjs',\
#    service_args=['--ignore-ssl-errors=true'])
    print(whatstime() + 'opening:\n'+ url +'... ...')
    browser.get(url)
    print(whatstime() + 'got web:\n' )
    print(browser.current_url)
    
if __name__ == ('__main__'):
    filepath = '/mnt/hgfs/VMWare/crawler_outcome/阿里巴巴-企业名称为人名.xlsx'
    origin_table = pd.read_excel(filepath)
    origin_table = origin_table[['商户名称','店铺类别','阿里巴巴网址']]
    url = origin_table.ix[2,'阿里巴巴网址']
    for each in origin_table:
        pass
    

